{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import math\n",
    "import csv\n",
    "import random\n",
    "import numpy as np\n",
    "from sklearn import linear_model\n",
    "from sklearn.model_selection import cross_val_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 当每支队伍没有elo等级分时，赋予其基础elo等级分\n",
    "base_elo = 1600\n",
    "team_elos = {} \n",
    "team_stats = {}\n",
    "X = []\n",
    "y = []\n",
    "folder = 'data' #存放数据的目录\n",
    "\n",
    "# 根据每支队伍的Miscellaneous Opponent，Team统计数据csv文件进行初始化\n",
    "def initialize_data(Mstat, Ostat, Tstat):\n",
    "    new_Mstat = Mstat.drop(['Rk', 'Arena'], axis=1)\n",
    "    new_Ostat = Ostat.drop(['Rk', 'G', 'MP'], axis=1)\n",
    "    new_Tstat = Tstat.drop(['Rk', 'G', 'MP'], axis=1)\n",
    "\n",
    "    team_stats1 = pd.merge(new_Mstat, new_Ostat, how='left', on='Team')\n",
    "    team_stats1 = pd.merge(team_stats1, new_Tstat, how='left', on='Team')\n",
    "    return team_stats1.set_index('Team', inplace=False, drop=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_elo(team):\n",
    "    try:\n",
    "        return team_elos[team]\n",
    "    except:\n",
    "        # 当最初没有elo时，给每个队伍最初赋base_elo\n",
    "        team_elos[team] = base_elo\n",
    "        return team_elos[team]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算每个球队的elo值\n",
    "def calc_elo(win_team, lose_team):\n",
    "    winner_rank = get_elo(win_team)\n",
    "    loser_rank = get_elo(lose_team)\n",
    "\n",
    "    rank_diff = winner_rank - loser_rank\n",
    "    exp = (rank_diff  * -1) / 400\n",
    "    odds = 1 / (1 + math.pow(10, exp))\n",
    "    # 根据rank级别修改K值\n",
    "    if winner_rank < 2100:\n",
    "        k = 32\n",
    "    elif winner_rank >= 2100 and winner_rank < 2400:\n",
    "        k = 24\n",
    "    else:\n",
    "        k = 16\n",
    "\n",
    "    # 更新 rank 数值\n",
    "    new_winner_rank = round(winner_rank + (k * (1 - odds)))      \n",
    "    new_loser_rank = round(loser_rank + (k * (0 - odds)))\n",
    "    return new_winner_rank, new_loser_rank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def  build_dataSet(all_data):\n",
    "    print(\"Building data set..\")\n",
    "    X = []\n",
    "    skip = 0\n",
    "    for index, row in all_data.iterrows():\n",
    "\n",
    "        Wteam = row['WTeam']\n",
    "        Lteam = row['LTeam']\n",
    "\n",
    "        #获取最初的elo或是每个队伍最初的elo值\n",
    "        team1_elo = get_elo(Wteam)\n",
    "        team2_elo = get_elo(Lteam)\n",
    "\n",
    "        # 给主场比赛的队伍加上100的elo值\n",
    "        if row['WLoc'] == 'H':\n",
    "            team1_elo += 100\n",
    "        else:\n",
    "            team2_elo += 100\n",
    "\n",
    "        # 把elo当为评价每个队伍的第一个特征值\n",
    "        team1_features = [team1_elo]\n",
    "        team2_features = [team2_elo]\n",
    "\n",
    "        # 添加我们从basketball reference.com获得的每个队伍的统计信息\n",
    "        for key, value in team_stats.loc[Wteam].iteritems():\n",
    "            team1_features.append(value)\n",
    "        for key, value in team_stats.loc[Lteam].iteritems():\n",
    "            team2_features.append(value)\n",
    "\n",
    "        # 将两支队伍的特征值随机的分配在每场比赛数据的左右两侧\n",
    "        # 并将对应的0/1赋给y值\n",
    "        if random.random() > 0.5:\n",
    "            X.append(team1_features + team2_features)\n",
    "            y.append(0)\n",
    "        else:\n",
    "            X.append(team2_features + team1_features)\n",
    "            y.append(1)\n",
    "\n",
    "        if skip == 0:\n",
    "            print('X',X)\n",
    "            skip = 1\n",
    "\n",
    "        # 根据这场比赛的数据更新队伍的elo值\n",
    "        new_winner_rank, new_loser_rank = calc_elo(Wteam, Lteam)\n",
    "        team_elos[Wteam] = new_winner_rank\n",
    "        team_elos[Lteam] = new_loser_rank\n",
    "\n",
    "    return np.nan_to_num(X), y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Mstat = pd.read_csv(folder + '/15-16Miscellaneous_Stat.csv')\n",
    "Ostat = pd.read_csv(folder + '/15-16Opponent_Per_Game_Stat.csv')\n",
    "Tstat = pd.read_csv(folder + '/15-16Team_Per_Game_Stat.csv')\n",
    "\n",
    "team_stats = initialize_data(Mstat, Ostat, Tstat)\n",
    "\n",
    "result_data = pd.read_csv(folder + '/2015-2016_result.csv')\n",
    "X, y = build_dataSet(result_data)\n",
    "\n",
    "# 训练网络模型\n",
    "print(\"Fitting on %d game samples..\" % len(X))\n",
    "\n",
    "model = linear_model.LogisticRegression()\n",
    "model.fit(X, y)\n",
    "\n",
    "#利用10折交叉验证计算训练正确率\n",
    "print(\"Doing cross-validation..\")\n",
    "print(cross_val_score(model, X, y, cv = 10, scoring='accuracy', n_jobs=-1).mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_winner(team_1, team_2, model):\n",
    "    features = []\n",
    "\n",
    "    # team 1，客场队伍\n",
    "    features.append(get_elo(team_1))\n",
    "    for key, value in team_stats.loc[team_1].iteritems():\n",
    "        features.append(value)\n",
    "\n",
    "    # team 2，主场队伍\n",
    "    features.append(get_elo(team_2) + 100)\n",
    "    for key, value in team_stats.loc[team_2].iteritems():\n",
    "        features.append(value)\n",
    "\n",
    "    features = np.nan_to_num(features)\n",
    "    return model.predict_proba([features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Predicting on new schedule..')\n",
    "schedule1617 = pd.read_csv(folder + '/16-17Schedule.csv')\n",
    "result = []\n",
    "for index, row in schedule1617.iterrows():\n",
    "    team1 = row['Vteam']\n",
    "    team2 = row['Hteam']\n",
    "    pred = predict_winner(team1, team2, model)\n",
    "    prob = pred[0][0]\n",
    "    if prob > 0.5:\n",
    "        winner = team1\n",
    "        loser = team2\n",
    "        result.append([winner, loser, prob])\n",
    "    else:\n",
    "        winner = team2\n",
    "        loser = team1\n",
    "        result.append([winner, loser, 1 - prob])\n",
    "\n",
    "with open('16-17Result.csv', 'w') as f:\n",
    "    writer = csv.writer(f)\n",
    "    writer.writerow(['win', 'lose', 'probability'])\n",
    "    writer.writerows(result)\n",
    "    print('done.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.read_csv('16-17Result.csv',header=0)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
